In [60]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from scipy.stats import zscore
from sklearn.tree import DecisionTreeClassifier
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy.stats import shapiro
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold,StratifiedKFold, LeaveOneOut
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from scipy.stats import randint
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pickle
from sklearn.ensemble import BaggingClassifier,AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
In [96]:
# !pip install 
# !pip install mxnet-mkl==1.6.0 numpy==1.23.1
# ! pip install imblearn

Q1 A - Import ‘signal-data.csv’ as DataFrame.

In [2]:
data = pd.read_csv("/workspaces/GreatLearning/Featurization, Model Selection & Tuning/Project/signal-data.csv")
In [4]:
data.head()
Out[4]:
Time 0 1 2 3 4 5 6 7 8 ... 581 582 583 584 585 586 587 588 589 Pass/Fail
0 2008-07-19 11:55:00 3030.93 2564.00 2187.7333 1411.1265 1.3602 100.0 97.6133 0.1242 1.5005 ... NaN 0.5005 0.0118 0.0035 2.3630 NaN NaN NaN NaN -1
1 2008-07-19 12:32:00 3095.78 2465.14 2230.4222 1463.6606 0.8294 100.0 102.3433 0.1247 1.4966 ... 208.2045 0.5019 0.0223 0.0055 4.4447 0.0096 0.0201 0.0060 208.2045 -1
2 2008-07-19 13:17:00 2932.61 2559.94 2186.4111 1698.0172 1.5102 100.0 95.4878 0.1241 1.4436 ... 82.8602 0.4958 0.0157 0.0039 3.1745 0.0584 0.0484 0.0148 82.8602 1
3 2008-07-19 14:43:00 2988.72 2479.90 2199.0333 909.7926 1.3204 100.0 104.2367 0.1217 1.4882 ... 73.8432 0.4990 0.0103 0.0025 2.0544 0.0202 0.0149 0.0044 73.8432 -1
4 2008-07-19 15:22:00 3032.24 2502.87 2233.3667 1326.5200 1.5334 100.0 100.3967 0.1235 1.5031 ... NaN 0.4800 0.4766 0.1045 99.3032 0.0202 0.0149 0.0044 73.8432 -1

5 rows × 592 columns

Q1 B - Print 5 point summary and share at least 2 observations.

In [5]:
data.describe()
Out[5]:
0 1 2 3 4 5 6 7 8 9 ... 581 582 583 584 585 586 587 588 589 Pass/Fail
count 1561.000000 1560.000000 1553.000000 1553.000000 1553.000000 1553.0 1553.000000 1558.000000 1565.000000 1565.000000 ... 618.000000 1566.000000 1566.000000 1566.000000 1566.000000 1566.000000 1566.000000 1566.000000 1566.000000 1567.000000
mean 3014.452896 2495.850231 2200.547318 1396.376627 4.197013 100.0 101.112908 0.121822 1.462862 -0.000841 ... 97.934373 0.500096 0.015318 0.003847 3.067826 0.021458 0.016475 0.005283 99.670066 -0.867262
std 73.621787 80.407705 29.513152 441.691640 56.355540 0.0 6.237214 0.008961 0.073897 0.015116 ... 87.520966 0.003404 0.017180 0.003720 3.578033 0.012358 0.008808 0.002867 93.891919 0.498010
min 2743.240000 2158.750000 2060.660000 0.000000 0.681500 100.0 82.131100 0.000000 1.191000 -0.053400 ... 0.000000 0.477800 0.006000 0.001700 1.197500 -0.016900 0.003200 0.001000 0.000000 -1.000000
25% 2966.260000 2452.247500 2181.044400 1081.875800 1.017700 100.0 97.920000 0.121100 1.411200 -0.010800 ... 46.184900 0.497900 0.011600 0.003100 2.306500 0.013425 0.010600 0.003300 44.368600 -1.000000
50% 3011.490000 2499.405000 2201.066700 1285.214400 1.316800 100.0 101.512200 0.122400 1.461600 -0.001300 ... 72.288900 0.500200 0.013800 0.003600 2.757650 0.020500 0.014800 0.004600 71.900500 -1.000000
75% 3056.650000 2538.822500 2218.055500 1591.223500 1.525700 100.0 104.586700 0.123800 1.516900 0.008400 ... 116.539150 0.502375 0.016500 0.004100 3.295175 0.027600 0.020300 0.006400 114.749700 -1.000000
max 3356.350000 2846.440000 2315.266700 3715.041700 1114.536600 100.0 129.252200 0.128600 1.656400 0.074900 ... 737.304800 0.509800 0.476600 0.104500 99.303200 0.102800 0.079900 0.028600 737.304800 1.000000

8 rows × 591 columns

In [6]:
data.shape
Out[6]:
(1567, 592)
In [7]:
data.dtypes
Out[7]:
Time          object
0            float64
1            float64
2            float64
3            float64
              ...   
586          float64
587          float64
588          float64
589          float64
Pass/Fail      int64
Length: 592, dtype: object

Observations

Column 0:

Minimum value = 2746 First Quartile (Q1) = 2966 Median (Second Quartile or Q2) = 3011 Third Quartile (Q3)= 3056 Maxmimum value = 3356

Column 1:

Minimum value = 2158 First Quartile (Q1) = 2452 Median (Second Quartile or Q2) = 2499 Third Quartile (Q3)= 2538 Maxmimum value = 2846

Column 588:

Minimum value = 0.001 First Quartile (Q1) = 0.003 Median (Second Quartile or Q2) = 0.004 Third Quartile (Q3)= 0.006 Maxmimum value = 0.02

Column 589:

Minimum value = 0 First Quartile (Q1) = 44 Median (Second Quartile or Q2) = 71 Third Quartile (Q3)= 114 Maxmimum value = 737

Q2 A - Write a for loop which will remove all the features with 20%+ Null values and impute rest with mean of the feature.

In [3]:
for column in data.columns:

    null_percentage = data[column].isnull().sum()/len(data[column])

    if null_percentage >= 0.2:
        data.drop(column, axis=1, inplace=True)
        # print(null_percentage,column)
    else:
        if data[column].dtypes == "O":
            mode_value = data[column].mode()
            # print("Number of NaN values before filling:", data[column].isna().sum())
            data[column].fillna(mode_value, inplace=True)
            # print("Number of NaN values after filling:", data[column].isna().sum())
            # print()

        else:
            mean_value = data[column].mean()
            # print("Number of NaN values after filling:", data[column].isna().sum())
            data[column].fillna(mean_value, inplace=True)
            # print("Number of NaN values after filling:", data[column].isna().sum())
            # print()
In [9]:
data.shape
Out[9]:
(1567, 560)

Q2 B - Identify and drop the features which are having same value for all the rows.

In [4]:
features_with_same_valuie = data.columns[data.nunique() == 1]
data.drop(columns=features_with_same_valuie, inplace=True)
In [11]:
print('After dropping features having same value for all the rows (rows,columns) =',data.shape)
# data.shape
After dropping features having same value for all the rows (rows,columns) = (1567, 444)

Q2 C - Drop other features if required using relevant functional knowledge. Clearly justify the same.

In [5]:
data.drop(columns=['Time'], inplace=True)

Dropping time columns since it won't help to predict the target column

Q2 D - Check for multi-collinearity in the data and take necessary action.

In [7]:
data.shape
Out[7]:
(1567, 443)
In [14]:
data.head()
Out[14]:
0 1 2 3 4 6 7 8 9 10 ... 577 582 583 584 585 586 587 588 589 Pass/Fail
0 3030.93 2564.00 2187.7333 1411.1265 1.3602 97.6133 0.1242 1.5005 0.0162 -0.0034 ... 14.9509 0.5005 0.0118 0.0035 2.3630 0.021458 0.016475 0.005283 99.670066 -1
1 3095.78 2465.14 2230.4222 1463.6606 0.8294 102.3433 0.1247 1.4966 -0.0005 -0.0148 ... 10.9003 0.5019 0.0223 0.0055 4.4447 0.009600 0.020100 0.006000 208.204500 -1
2 2932.61 2559.94 2186.4111 1698.0172 1.5102 95.4878 0.1241 1.4436 0.0041 0.0013 ... 9.2721 0.4958 0.0157 0.0039 3.1745 0.058400 0.048400 0.014800 82.860200 1
3 2988.72 2479.90 2199.0333 909.7926 1.3204 104.2367 0.1217 1.4882 -0.0124 -0.0033 ... 8.5831 0.4990 0.0103 0.0025 2.0544 0.020200 0.014900 0.004400 73.843200 -1
4 3032.24 2502.87 2233.3667 1326.5200 1.5334 100.3967 0.1235 1.5031 -0.0031 -0.0072 ... 10.9698 0.4800 0.4766 0.1045 99.3032 0.020200 0.014900 0.004400 73.843200 -1

5 rows × 443 columns

In [6]:
data_matrix = data.corr().abs()
print(data_matrix)
                  0         1         2         3         4         6  \
0          1.000000  0.143840  0.004756  0.007613  0.011014  0.002270   
1          0.143840  1.000000  0.005767  0.007568  0.001636  0.025564   
2          0.004756  0.005767  1.000000  0.298935  0.095891  0.136225   
3          0.007613  0.007568  0.298935  1.000000  0.058483  0.685835   
4          0.011014  0.001636  0.095891  0.058483  1.000000  0.074368   
...             ...       ...       ...       ...       ...       ...   
586        0.018443  0.009403  0.025495  0.034711  0.043929  0.041209   
587        0.025880  0.017266  0.029345  0.039132  0.031005  0.034027   
588        0.028166  0.010118  0.030818  0.033645  0.026100  0.032227   
589        0.004174  0.044797  0.032890  0.080341  0.050910  0.043777   
Pass/Fail  0.025141  0.002603  0.000957  0.024623  0.013756  0.016239   

                  7         8         9        10  ...       577       582  \
0          0.031483  0.052622  0.009045  0.006504  ...  0.008601  0.000224   
1          0.012037  0.031258  0.023964  0.009645  ...  0.010145  0.043556   
2          0.146213  0.023528  0.016168  0.069893  ...  0.028705  0.006023   
3          0.073856  0.102892  0.068215  0.049873  ...  0.016438  0.008988   
4          0.347734  0.025946  0.054206  0.006470  ...  0.004070  0.045081   
...             ...       ...       ...       ...  ...       ...       ...   
586        0.058113  0.010433  0.033738  0.000327  ...  0.002684  0.016726   
587        0.021426  0.022845  0.059301  0.046965  ...  0.009405  0.024473   
588        0.020893  0.026250  0.060758  0.046048  ...  0.015596  0.020705   
589        0.107804  0.022770  0.004880  0.008393  ...  0.024766  0.041486   
Pass/Fail  0.012991  0.028016  0.031191  0.033639  ...  0.049633  0.047020   

                583       584       585       586       587       588  \
0          0.023453  0.019907  0.023589  0.018443  0.025880  0.028166   
1          0.002904  0.001264  0.002273  0.009403  0.017266  0.010118   
2          0.015697  0.018225  0.015752  0.025495  0.029345  0.030818   
3          0.025436  0.024736  0.026019  0.034711  0.039132  0.033645   
4          0.001300  0.001597  0.001616  0.043929  0.031005  0.026100   
...             ...       ...       ...       ...       ...       ...   
586        0.002257  0.001605  0.002743  1.000000  0.167913  0.164238   
587        0.002649  0.002498  0.002930  0.167913  1.000000  0.974276   
588        0.002260  0.001957  0.002530  0.164238  0.974276  1.000000   
589        0.003008  0.003295  0.003800  0.486559  0.390813  0.389211   
Pass/Fail  0.005981  0.005419  0.005034  0.004156  0.035391  0.031167   

                589  Pass/Fail  
0          0.004174   0.025141  
1          0.044797   0.002603  
2          0.032890   0.000957  
3          0.080341   0.024623  
4          0.050910   0.013756  
...             ...        ...  
586        0.486559   0.004156  
587        0.390813   0.035391  
588        0.389211   0.031167  
589        1.000000   0.002653  
Pass/Fail  0.002653   1.000000  

[443 rows x 443 columns]
In [7]:
upper = data_matrix.where(np.triu(np.ones(data_matrix.shape), k=1).astype(bool))
print(upper)
            0        1         2         3         4         6         7  \
0         NaN  0.14384  0.004756  0.007613  0.011014  0.002270  0.031483   
1         NaN      NaN  0.005767  0.007568  0.001636  0.025564  0.012037   
2         NaN      NaN       NaN  0.298935  0.095891  0.136225  0.146213   
3         NaN      NaN       NaN       NaN  0.058483  0.685835  0.073856   
4         NaN      NaN       NaN       NaN       NaN  0.074368  0.347734   
...        ..      ...       ...       ...       ...       ...       ...   
586       NaN      NaN       NaN       NaN       NaN       NaN       NaN   
587       NaN      NaN       NaN       NaN       NaN       NaN       NaN   
588       NaN      NaN       NaN       NaN       NaN       NaN       NaN   
589       NaN      NaN       NaN       NaN       NaN       NaN       NaN   
Pass/Fail NaN      NaN       NaN       NaN       NaN       NaN       NaN   

                  8         9        10  ...       577       582       583  \
0          0.052622  0.009045  0.006504  ...  0.008601  0.000224  0.023453   
1          0.031258  0.023964  0.009645  ...  0.010145  0.043556  0.002904   
2          0.023528  0.016168  0.069893  ...  0.028705  0.006023  0.015697   
3          0.102892  0.068215  0.049873  ...  0.016438  0.008988  0.025436   
4          0.025946  0.054206  0.006470  ...  0.004070  0.045081  0.001300   
...             ...       ...       ...  ...       ...       ...       ...   
586             NaN       NaN       NaN  ...       NaN       NaN       NaN   
587             NaN       NaN       NaN  ...       NaN       NaN       NaN   
588             NaN       NaN       NaN  ...       NaN       NaN       NaN   
589             NaN       NaN       NaN  ...       NaN       NaN       NaN   
Pass/Fail       NaN       NaN       NaN  ...       NaN       NaN       NaN   

                584       585       586       587       588       589  \
0          0.019907  0.023589  0.018443  0.025880  0.028166  0.004174   
1          0.001264  0.002273  0.009403  0.017266  0.010118  0.044797   
2          0.018225  0.015752  0.025495  0.029345  0.030818  0.032890   
3          0.024736  0.026019  0.034711  0.039132  0.033645  0.080341   
4          0.001597  0.001616  0.043929  0.031005  0.026100  0.050910   
...             ...       ...       ...       ...       ...       ...   
586             NaN       NaN       NaN  0.167913  0.164238  0.486559   
587             NaN       NaN       NaN       NaN  0.974276  0.390813   
588             NaN       NaN       NaN       NaN       NaN  0.389211   
589             NaN       NaN       NaN       NaN       NaN       NaN   
Pass/Fail       NaN       NaN       NaN       NaN       NaN       NaN   

           Pass/Fail  
0           0.025141  
1           0.002603  
2           0.000957  
3           0.024623  
4           0.013756  
...              ...  
586         0.004156  
587         0.035391  
588         0.031167  
589         0.002653  
Pass/Fail        NaN  

[443 rows x 443 columns]
In [8]:
to_drop = [column for column in upper.columns if any(upper[column] > 0.70)]
print(len(to_drop),to_drop)
241 ['17', '22', '26', '27', '30', '34', '35', '36', '39', '46', '50', '51', '54', '60', '65', '66', '70', '96', '98', '101', '104', '105', '106', '123', '124', '125', '127', '130', '140', '147', '148', '152', '154', '155', '163', '164', '165', '174', '185', '187', '196', '197', '198', '199', '202', '203', '204', '205', '206', '207', '209', '224', '248', '249', '252', '254', '270', '271', '272', '273', '274', '275', '277', '278', '279', '280', '281', '282', '283', '285', '286', '287', '288', '289', '290', '291', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '316', '317', '318', '319', '320', '321', '323', '324', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '359', '360', '361', '362', '363', '365', '366', '367', '368', '376', '377', '386', '387', '388', '389', '390', '391', '392', '393', '405', '406', '407', '408', '409', '410', '411', '412', '413', '415', '416', '417', '420', '421', '424', '425', '426', '427', '428', '429', '430', '431', '434', '435', '436', '437', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '452', '453', '454', '455', '456', '457', '459', '467', '469', '470', '471', '473', '474', '475', '477', '478', '479', '480', '490', '491', '493', '494', '495', '496', '497', '520', '522', '523', '524', '525', '526', '527', '539', '540', '541', '545', '549', '551', '552', '553', '554', '555', '556', '557', '560', '561', '566', '567', '568', '569', '573', '574', '575', '576', '577', '584', '585', '588']
In [9]:
data.drop(to_drop, axis=1, inplace=True)
In [10]:
data.shape
Out[10]:
(1567, 202)

We checked for the multi-collinearity in dataset and removed columns with high correlation

Now, no. features reduced from 443 to 202

Q2 E - Make all relevant modifications on the data using both functional/logical reasoning/assumptions.

In [12]:
data['Pass/Fail'].value_counts(normalize=True)
Out[12]:
Pass/Fail
-1    0.933631
 1    0.066369
Name: proportion, dtype: float64
In [10]:
data['Pass/Fail']=data['Pass/Fail'].replace([-1,1],[0,1])
In [11]:
data['Pass/Fail'].value_counts(normalize=True)
Out[11]:
Pass/Fail
0    0.933631
1    0.066369
Name: proportion, dtype: float64

We have modified target column , we have label encoded the target column and repaced -1 with 0 and 1 with 1

Q3 A - Perform a detailed univariate Analysis with appropriate detailed comments after each analysis.

Performing univariate analysis for 200+ column is diffcult but lets start with descriptive analysis

In [23]:
#Descriptive Statisctics
descriptive_stats = data.describe().T
print(descriptive_stats.head())
    count         mean         std        min         25%        50%  \
0  1567.0  3014.452896   73.480613  2743.2400  2966.66500  3011.8400   
1  1567.0  2495.850231   80.227793  2158.7500  2452.88500  2498.9100   
2  1567.0  2200.547318   29.380932  2060.6600  2181.09995  2200.9556   
3  1567.0  1396.376627  439.712852     0.0000  1083.88580  1287.3538   
4  1567.0     4.197013   56.103066     0.6815     1.01770     1.3171   

         75%        max  
0  3056.5400  3356.3500  
1  2538.7450  2846.4400  
2  2218.0555  2315.2667  
3  1590.1699  3715.0417  
4     1.5296  1114.5366  
In [24]:
def check_distribution(column):
    _, p_value = shapiro(column)

    if set(list(column))== set([0,1]):
        return 'Binomial'
    
    if p_value > 0.05:
        return 'Normal'

    skewness = column.skew()
    if skewness > 0:
        return 'Right Skewed'
    elif skewness < 0:
        return 'Left Skewed'

    unique_ratio = column.nunique() / len(column)
    if unique_ratio < 0.05:
        return 'Uniform'



    return 'Unknown'
In [25]:
data_dist = []

for i in data.columns:
    # print(i)
    data_dist.append([i,check_distribution(data[i])])

# print(set(data_dist))
data_dist = pd.DataFrame(data_dist,columns=["Feature","Distribution"])
In [26]:
data_dist["Distribution"].unique()
Out[26]:
array(['Right Skewed', 'Left Skewed', 'Normal', 'Binomial'], dtype=object)
In [27]:
data.shape
Out[27]:
(1567, 202)
In [28]:
data.columns
Out[28]:
Index(['0', '1', '2', '3', '4', '6', '7', '8', '9', '10',
       ...
       '565', '570', '571', '572', '582', '583', '586', '587', '589',
       'Pass/Fail'],
      dtype='object', length=202)
In [24]:
len(data.columns)
Out[24]:
202
In [30]:
columns_per_row = 5
total_rows = (len(data.columns) + columns_per_row - 1) // columns_per_row
fig, axes = plt.subplots(total_rows, columns_per_row, figsize=(15, total_rows*3))
axes = axes.flatten()
for i, column in enumerate(data.columns):
    sns.kdeplot(data[column], ax=axes[i], fill=True)
    axes[i].set_title(column)
    axes[i].set_xlabel('')
    axes[i].set_ylabel('Density')

for j in range(len(data.columns), total_rows * columns_per_row):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()
No description has been provided for this image
In [31]:
data.shape
Out[31]:
(1567, 202)

Insights from KDE plot

Most of the features in the analysis is skewed (left or right skewed) some are Normal and binomial distributed.

Features are gorups into three different distribution by using p value (found using shapiro-wilk test)

1.Normal Distribution (p value > 0.05)

2.Binomial Distribution (value 0/1)

3.Left Skewed Distribution (skew < 0)

4.Right Skewed Distribution (skew > 0)

In [32]:
data_dist["Distribution"].value_counts()
Out[32]:
Distribution
Right Skewed    151
Left Skewed      49
Normal            1
Binomial          1
Name: count, dtype: int64

Overall Distribution Insight

No of Right skewed fearures = 151

No of left skewed feartured = 49

No of normal distributed features = 1

No of binomial distributed features = 1

In [34]:
columns_per_row = 5
total_rows = (len(data.columns) + columns_per_row - 1) // columns_per_row
fig, axes = plt.subplots(total_rows, columns_per_row, figsize=(15, total_rows*3))
axes = axes.flatten()
for i, column in enumerate(data.columns):
    sns.boxplot(data[column], ax=axes[i], fill=True,orient="h")
    axes[i].set_title(column)
    axes[i].set_xlabel('')
    axes[i].set_ylabel('Boxplot')

for j in range(len(data.columns), total_rows * columns_per_row):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()
No description has been provided for this image

Insights from Boxplot From the plot, we can infer most of the columns are with outliers, so we can clean and remove outiers from all the columns

In [12]:
for i in data.columns[:-1]:
    q1 = data[i].quantile(0.25)
    q3 = data[i].quantile(0.75)
    iqr = q3 - q1
    
    low = q1 - 1.5 * iqr
    high = q3 + 1.5 * iqr
    
    data.loc[(data[i] < low) | (data[i] > high), i] = data[i].median()
In [13]:
data.shape
Out[13]:
(1567, 202)
In [37]:
columns_per_row = 5
total_rows = (len(data.columns) + columns_per_row - 1) // columns_per_row
fig, axes = plt.subplots(total_rows, columns_per_row, figsize=(15, total_rows*3))
axes = axes.flatten()
for i, column in enumerate(data.columns):
    sns.boxplot(data[column], ax=axes[i], fill=True,orient="h")
    axes[i].set_title(column)
    axes[i].set_xlabel('')
    axes[i].set_ylabel('Boxplot')

for j in range(len(data.columns), total_rows * columns_per_row):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()
No description has been provided for this image

Now we have cleaned and removed all outliers form the dataset

Q3 B - Perform bivariate and multivariate analysis with appropriate detailed comments after each analysis.

Lets take subset of sample 10 columns and perform the analysis

In [14]:
data.shape
Out[14]:
(1567, 202)
In [39]:
subset_columns = data.sample(10, axis=1, random_state=42)
In [ ]:
 
In [40]:
subset_columns.corr()
Out[40]:
137 18 41 423 550 170 95 487 433 64
137 1.000000 0.019806 0.026938 0.061175 0.046226 0.009263 0.064987 0.020894 -0.016252 0.004912
18 0.019806 1.000000 0.036318 0.005551 -0.024781 0.012427 -0.008170 0.028692 -0.068229 0.006252
41 0.026938 0.036318 1.000000 0.009183 0.013748 0.045997 0.017352 -0.058258 -0.007330 0.040778
423 0.061175 0.005551 0.009183 1.000000 0.070703 0.070113 0.118962 -0.062499 0.014841 -0.023748
550 0.046226 -0.024781 0.013748 0.070703 1.000000 0.008262 -0.029039 -0.040322 -0.029160 -0.031059
170 0.009263 0.012427 0.045997 0.070113 0.008262 1.000000 0.014416 -0.100964 -0.049580 -0.046293
95 0.064987 -0.008170 0.017352 0.118962 -0.029039 0.014416 1.000000 0.031382 -0.027062 0.043572
487 0.020894 0.028692 -0.058258 -0.062499 -0.040322 -0.100964 0.031382 1.000000 0.032866 0.013466
433 -0.016252 -0.068229 -0.007330 0.014841 -0.029160 -0.049580 -0.027062 0.032866 1.000000 0.034024
64 0.004912 0.006252 0.040778 -0.023748 -0.031059 -0.046293 0.043572 0.013466 0.034024 1.000000
In [41]:
subset_columns.head()
Out[41]:
137 18 41 423 550 170 95 487 433 64
0 75.2 192.3963 4.515 53.6840 12.930000 0.7250 0.0000 0.0000 49.0013 21.7264
1 81.0 191.2872 2.773 61.8918 16.000000 1.0498 0.0001 820.7900 199.7866 19.1927
2 96.5 192.7035 3.080 50.6425 16.160000 1.0824 0.0002 74.0741 109.5747 16.1755
3 123.7 192.1557 3.080 94.4594 17.013313 0.9386 0.0002 71.7583 181.2641 15.6209
4 123.1 191.6037 2.209 85.2255 19.630000 0.5760 -0.0001 587.3773 0.0000 20.0445
In [42]:
subset_columns["target"] = data["Pass/Fail"]
In [43]:
sns.pairplot(subset_columns)
Out[43]:
<seaborn.axisgrid.PairGrid at 0x7f8c467cf100>
No description has been provided for this image

Insights from pairplot

We dont' see any outliers(since we removed), correlation between independent variables (since we removed multi collinearity) and we see some different distribution of data normal, skewed and binomial

In [44]:
correlation_matrix = subset_columns.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap of Subset of Columns')
plt.show()
No description has been provided for this image

Insights from heatmap

We dont' see any any high positive or negative correlation between independent variables since we have treated multicollinearity before

Q4 A - Segregate predictors vs target attributes.

In [15]:
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]
In [46]:
X.head()
Out[46]:
0 1 2 3 4 6 7 8 9 10 ... 564 565 570 571 572 582 583 586 587 589
0 3030.93 2564.00 2187.7333 1411.1265 1.3602 97.6133 0.1242 1.5005 0.0162 -0.0034 ... 6.444985 0.14561 533.8500 2.1113 8.95 0.5005 0.0118 0.021458 0.016475 99.670066
1 3095.78 2465.14 2230.4222 1463.6606 0.8294 102.3433 0.1247 1.4966 -0.0005 -0.0148 ... 6.444985 0.14561 535.0164 2.4335 5.92 0.5019 0.0223 0.009600 0.020100 208.204500
2 2932.61 2559.94 2186.4111 1698.0172 1.5102 95.4878 0.1241 1.4436 0.0041 0.0013 ... 6.290000 0.14280 535.0245 2.0293 11.21 0.4958 0.0157 0.020500 0.014800 82.860200
3 2988.72 2479.90 2199.0333 909.7926 1.3204 104.2367 0.1217 1.4882 -0.0124 -0.0033 ... 7.320000 0.16300 530.5682 2.0253 9.33 0.4990 0.0103 0.020200 0.014900 73.843200
4 3032.24 2502.87 2233.3667 1326.5200 1.5334 100.3967 0.1235 1.5031 -0.0031 -0.0072 ... 6.444985 0.14561 532.0155 2.0275 8.83 0.5002 0.0138 0.020200 0.014900 73.843200

5 rows × 201 columns

In [47]:
Y.unique()
Out[47]:
array([0, 1])

Q4 B - Check for target balancing and fix it if found imbalanced.

In [48]:
data["Pass/Fail"].value_counts()/len(data["Pass/Fail"])
Out[48]:
Pass/Fail
0    0.933631
1    0.066369
Name: count, dtype: float64

There is a class imbalance class 0 has nearly 94% of data and class 1 has 6% of data

In [16]:
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, Y)
In [17]:
y_resampled.value_counts()/len(y_resampled)
Out[17]:
Pass/Fail
0    0.5
1    0.5
Name: count, dtype: float64

Class imbalance has been fixed, each class has equal no of records (50% each)

Q4 C - Perform train-test split and standardize the data or vice versa if required.

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
In [19]:
X_train.shape
Out[19]:
(2340, 201)
In [20]:
X_test.shape
Out[20]:
(586, 201)
In [21]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Q4 D - Check if the train and test data have similar statistical characteristics when compared with original data.

In [55]:
data_stats = data.describe()
train_stats = X_train.describe()
test_stats = X_test.describe()
In [56]:
print("5 points summary of Orginal Dataset")
data_stats.T
5 points summary of Orginal Dataset
Out[56]:
count mean std min 25% 50% 75% max
0 1567.0 3010.002449 61.721903 2831.9100 2968.1950 3011.8400 3051.8400 3190.9700
1 1567.0 2496.985253 59.577861 2326.5900 2459.8700 2498.9100 2534.3250 2666.0400
2 1567.0 2201.001188 25.681452 2126.6555 2183.0556 2200.9556 2217.5778 2270.2556
3 1567.0 1345.707557 334.300811 711.0258 1084.3779 1287.3538 1551.6947 2347.9092
4 1567.0 1.296507 0.333287 0.6815 1.0177 1.3171 1.4905 2.2449
... ... ... ... ... ... ... ... ...
583 1567.0 0.013891 0.003183 0.0060 0.0116 0.0138 0.0158 0.0237
586 1567.0 0.020574 0.010188 -0.0060 0.0135 0.0205 0.0274 0.0484
587 1567.0 0.015366 0.006558 0.0032 0.0106 0.0148 0.0190 0.0345
589 1567.0 76.511991 45.382032 0.0000 44.3686 72.0230 94.4159 220.0378
Pass/Fail 1567.0 0.066369 0.249005 0.0000 0.0000 0.0000 0.0000 1.0000

202 rows × 8 columns

In [57]:
print("5 points summary of Train Dataset")
train_stats.T
5 points summary of Train Dataset
Out[57]:
count mean std min 25% 50% 75% max
0 2340.0 3004.256469 59.878665 2831.9100 2962.943377 3001.515000 3043.724024 3190.9700
1 2340.0 2496.994359 55.266846 2326.5900 2462.289829 2498.910000 2531.182500 2664.5200
2 2340.0 2199.411263 23.088816 2126.6555 2182.027369 2199.276227 2214.859155 2270.2556
3 2340.0 1331.779702 297.220185 711.0258 1098.632416 1279.477538 1517.650450 2347.9092
4 2340.0 1.282883 0.297298 0.6815 1.048171 1.310100 1.461557 2.2449
... ... ... ... ... ... ... ... ...
582 2340.0 0.500326 0.003155 0.4915 0.498266 0.500367 0.502416 0.5090
583 2340.0 0.013886 0.002892 0.0060 0.011718 0.013615 0.015700 0.0237
586 2340.0 0.021079 0.009524 -0.0060 0.014645 0.020987 0.027500 0.0484
587 2340.0 0.015953 0.006004 0.0032 0.011316 0.015141 0.020089 0.0345
589 2340.0 77.765677 43.191719 0.0000 48.246057 71.850864 96.753113 220.0378

201 rows × 8 columns

In [58]:
print("5 points summary of Test Dataset")
test_stats.T
5 points summary of Test Dataset
Out[58]:
count mean std min 25% 50% 75% max
0 586.0 2997.136603 58.070305 2846.070000 2958.076926 2995.745000 3031.835000 3190.7800
1 586.0 2496.375103 55.154780 2332.390000 2465.442500 2499.004939 2529.951022 2666.0400
2 586.0 2197.888701 22.180319 2134.744500 2181.308935 2197.715888 2213.152317 2256.6000
3 586.0 1343.587745 280.225169 867.302700 1117.189770 1288.668406 1520.972784 2341.7833
4 586.0 1.291169 0.298840 0.728748 1.063290 1.304509 1.484075 2.2328
... ... ... ... ... ... ... ... ...
582 586.0 0.500434 0.003128 0.491500 0.498301 0.500500 0.502400 0.5088
583 586.0 0.013757 0.002806 0.007100 0.011565 0.013683 0.015584 0.0236
586 586.0 0.020937 0.009581 -0.006000 0.014531 0.020900 0.026765 0.0484
587 586.0 0.016083 0.006067 0.004500 0.011600 0.015517 0.019957 0.0339
589 586.0 78.830305 43.848858 0.000000 49.594581 71.653961 96.755252 220.0378

201 rows × 8 columns

In [59]:
# T.iloc[1,:]
summary_comparison = pd.concat([data_stats.T.iloc[1,:], train_stats.T.iloc[1,:], test_stats.T.iloc[1,:]], axis=1)
In [60]:
print("Comparision of 5 points summary of column 1 (sample coln) to check the statistical characteristics")
summary_comparison
Comparision of 5 points summary of column 1 (sample coln) to check the statistical characteristics
Out[60]:
1 1 1
count 1567.000000 2340.000000 586.000000
mean 2496.985253 2496.994359 2496.375103
std 59.577861 55.266846 55.154780
min 2326.590000 2326.590000 2332.390000
25% 2459.870000 2462.289829 2465.442500
50% 2498.910000 2498.910000 2499.004939
75% 2534.325000 2531.182500 2529.951022
max 2666.040000 2664.520000 2666.040000

Orginal dataset, train dataset and test dataset has similar statistical characteristics

Q5 A - Use any Supervised Learning technique to train a model.

In [61]:
dTree = DecisionTreeClassifier(criterion = 'gini', random_state=1)
dTree.fit(X_train, y_train)
Out[61]:
DecisionTreeClassifier(random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(random_state=1)
In [62]:
print("Train Score",dTree.score(X_train, y_train))
print("Test Score",dTree.score(X_test, y_test))
Train Score 1.0
Test Score 0.8822525597269625

The simple model has 1 as train score and 0.88 as test score

This seems to be overfitted since it perform very good with training set and low with test set

In [63]:
print("Confusion Matrix- Simple Dtree")
y_predict = dTree.predict(X_test)

cm=metrics.confusion_matrix(y_test, y_predict,labels=[0, 1])

df_cm = pd.DataFrame(cm, index = [i for i in ["No","Yes"]],
                  columns = [i for i in ["Predicted No","Predicted Yes"]])
plt.figure(figsize = (7,5))
sns.heatmap(df_cm, annot=True ,fmt='g')
Confusion Matrix- Simple Dtree
Out[63]:
<Axes: >
No description has been provided for this image
In [64]:
predicted_labels_test = dTree.predict(X_test)
print("Test Performance Matrix: (Simple Dtree Model):")
metrics.classification_report(y_test, predicted_labels_test).split("\n")
Test Performance Matrix: (Simple Dtree Model):
Out[64]:
['              precision    recall  f1-score   support',
 '',
 '           0       0.92      0.85      0.88       306',
 '           1       0.85      0.92      0.88       280',
 '',
 '    accuracy                           0.88       586',
 '   macro avg       0.88      0.88      0.88       586',
 'weighted avg       0.89      0.88      0.88       586',
 '']

Simple Model Observation

Class False:

Precision is 0.92

Recall is 0.85

F1-Score is 0.88

Class True:

Precision is 0.85

Recall is 0.92

F1-Score is 0.88

Q5 B - Use cross validation techniques.

In [68]:
kf = KFold(n_splits=10, shuffle=True, random_state=42)
cv_scores = cross_val_score(DecisionTreeClassifier(), X_resampled, y_resampled, cv=kf)
print(cv_scores)
print("\nKfold Cross Validation")
print("Accuracy : %.1f%% (%.1f%%)"%(cv_scores.mean()*100,cv_scores.std()*100))
[0.88395904 0.8668942  0.91467577 0.90443686 0.89761092 0.89419795
 0.91438356 0.88013699 0.90753425 0.89383562]

Kfold Cross Validation
Accuracy : 89.6% (1.5%)
In [71]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
cv_scores = cross_val_score(DecisionTreeClassifier(), X_resampled, y_resampled, cv=skf)
print(cv_scores)
print("\nStratified Kfold Cross Validation")
print("Accuracy : %.1f%% (%.1f%%)"%(cv_scores.mean()*100,cv_scores.std()*100))
[0.87372014 0.89761092 0.90443686 0.87372014 0.92832765 0.92832765
 0.90068493 0.91780822 0.90753425 0.90068493]

Stratified Kfold Cross Validation
Accuracy : 90.3% (1.8%)

Have tried 10 fold validation

K-fold validation - score - received accuracy is 89.6% (with 2.6% as standard decviartion from mean)

Stratified K-fold validation - score - received accuracy is 90.3% (with 1.8% as standard decviartion from mean)

Leave one out Cross validation - score - is not good idea to try because it will compute near to 1.5k iterations, this is not good approach and it is mainly use for small dataset

Conclusion

At last Stratified K-fold validation give best accuracy 90.3 % while comapred ti k-fold (89.6%)

Q5 C - Apply hyper-parameter tuning techniques to get the best accuracy.

In [75]:
param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
In [76]:
grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)
Fitting 5 folds for each of 144 candidates, totalling 720 fits
Out[76]:
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [None, 10, 20, 30],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10],
                         'splitter': ['best', 'random']},
             scoring='accuracy', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [None, 10, 20, 30],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10],
                         'splitter': ['best', 'random']},
             scoring='accuracy', verbose=1)
DecisionTreeClassifier()
DecisionTreeClassifier()
In [77]:
dTreeP_model = grid_search.best_params_
dTreeP_model
Out[77]:
{'criterion': 'entropy',
 'max_depth': 20,
 'min_samples_leaf': 2,
 'min_samples_split': 10,
 'splitter': 'best'}
In [79]:
dTreehp = DecisionTreeClassifier(criterion = 'entropy', max_depth=20,min_samples_leaf=2,min_samples_split=10,splitter = 'best',random_state=1)
dTreehp.fit(X_train, y_train)
Out[79]:
DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2,
                       min_samples_split=10, random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2,
                       min_samples_split=10, random_state=1)
In [80]:
print("Hyper Parametertuning - Train Score",dTreehp.score(X_train, y_train))
print("Hyper Paramtertuning - Test Score",dTreehp.score(X_test, y_test))
Hyper Parametertuning - Train Score 0.9846153846153847
Hyper Paramtertuning - Test Score 0.8856655290102389

Insights from Hyper tuning Score improved from ordinary model

Train : 98.4 (reduced from 100) - overcomed overfitting

Test : 88.5 (improved from 88.2) - improved the score and normalised

Q5 D - Use any other technique/method which can enhance the model performance.

In [81]:
pca = PCA(n_components=10)
pca.fit(X_resampled)
Out[81]:
PCA(n_components=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA(n_components=10)
In [82]:
print(pca.explained_variance_)
[13845042.29453119  4541802.83785673  2768144.42643618   539312.54347369
   142388.81973074   112639.00814615   109456.9333967     99141.69763772
    95441.77895265    85710.87218695]
In [83]:
print(pca.components_)
[[ 9.90329021e-04  1.03248746e-03 -7.29179980e-04 ...  2.56145741e-07
  -6.48616005e-08 -9.43401943e-04]
 [-1.03852355e-03  9.62204927e-04  2.19054829e-04 ... -3.55785954e-07
   1.26937856e-07  2.17596664e-03]
 [-5.53705667e-04 -2.67157450e-03  2.06643834e-04 ...  1.01734591e-08
  -1.14168622e-07 -1.26488786e-03]
 ...
 [ 1.74043629e-03  3.46837520e-03  7.70921279e-03 ... -1.65746007e-06
  -5.17496363e-07 -1.08583484e-02]
 [ 1.03642149e-02 -1.76771800e-03  1.17899649e-02 ... -1.80648225e-06
  -9.60361855e-07 -5.86849982e-03]
 [-3.23261415e-03 -1.62442877e-02  1.87092541e-02 ... -1.52065796e-06
  -1.87997688e-07  2.01751417e-03]]
In [84]:
print(pca.explained_variance_ratio_)
[0.6009062  0.19712453 0.12014374 0.02340739 0.00618    0.00488879
 0.00475068 0.00430297 0.00414239 0.00372005]
In [85]:
plt.bar(list(range(1,11)),pca.explained_variance_ratio_,alpha=0.5, align='center')
plt.ylabel('Variation explained')
plt.xlabel('# of PCA Components')
plt.show()
No description has been provided for this image
In [86]:
plt.step(list(range(1,11)),np.cumsum(pca.explained_variance_ratio_), where='mid')
plt.ylabel('Cum of variation explained')
plt.xlabel('# of PCA Components')
plt.show()
No description has been provided for this image

With 4 pca components its explaining more than 90 and near to 95 of variation so we can select 4 components

In [23]:
pca4 = PCA(n_components=4)
pca4.fit(X_resampled)
print(pca4.components_)
print(pca4.explained_variance_ratio_)
Xpca4 = pca4.transform(X_resampled)
[[ 7.64336501e-04  1.17203029e-03 -5.80489754e-04 -2.56582187e-03
   1.70540860e-06 -2.59871166e-05  4.16197498e-08  7.88689037e-07
  -2.64082580e-07  1.98219313e-07  2.65982355e-07  7.61884767e-06
  -7.07161304e-05 -3.19050023e-04 -7.28799973e-06  2.14174132e-05
   4.17927504e-07  1.29765155e-07  3.52747936e-02 -7.97061180e-03
  -1.32297757e-01  1.25600773e-05  1.16181302e-04 -2.98135451e-05
  -1.38543770e-06  4.85565809e-06 -1.36905369e-06 -3.14957736e-06
  -1.28489215e-07  8.74198301e-05  1.60278307e-06 -1.97788690e-05
   4.51498881e-06 -9.11301961e-05  5.11908054e-06  1.08333537e-04
   5.11941460e-08 -4.45144436e-04 -1.39479834e-07 -1.39390239e-08
  -1.46594688e-06 -9.41416597e-06  3.45157716e-06 -1.25092742e-04
   2.58189120e-05 -5.84043790e-05 -1.69101062e-06  4.27960995e-05
  -4.20506148e-06 -0.00000000e+00  1.59863235e-07  2.22153157e-07
  -3.30400656e-07  1.82213354e-06 -1.62346856e-07  2.04830417e-06
   2.47799403e-07 -6.37200375e-08  4.48054658e-06 -4.50693216e-09
   7.85517297e-07  1.84034291e-07 -2.29895313e-05  3.81112223e-07
   6.91569352e-03  7.74962604e-07  2.15139235e-08 -1.56013495e-08
  -1.44192952e-09  1.32779085e-09 -4.99811976e-07  3.92117160e-09
  -6.33038572e-07 -8.37358784e-08 -8.61380606e-07 -3.28687036e-07
   6.66729260e-08 -0.00000000e+00  2.41360469e-04 -1.15554934e-08
  -2.39207415e-05  9.74486634e-08 -6.95266936e-08 -1.56719260e-06
   4.54073728e-07  1.30838357e-05 -1.48735009e-06 -2.85134289e-06
   4.02186792e-06 -1.41291088e-08 -1.98919614e-06  2.00099355e-05
   2.04115974e-05  3.58576366e-04 -1.06046444e-03 -1.08354471e-03
  -8.80796868e-05  5.41393261e-03  2.84015667e-05  4.25331821e-09
  -1.16308434e-07  2.59456367e-07 -6.35878536e-07 -4.68341113e-05
   1.85420254e-05 -1.10752425e-07  2.26293224e-07  1.07797710e-02
  -1.04125442e-02 -4.48335462e-02  9.89249015e-01 -1.33375635e-06
   2.65677999e-05  2.72216023e-06  1.59859118e-05 -1.96364985e-06
   1.35787070e-07  4.85116008e-07  1.50887578e-06 -2.26016926e-06
   8.60267087e-07  3.06482824e-06  9.78887418e-05 -1.14750508e-06
   2.48813794e-05  3.01701396e-05  1.17747210e-06  6.76671280e-06
   5.07152398e-08  6.96373488e-05  6.22478138e-05 -8.31895155e-04
   6.80375934e-08 -3.78011396e-09 -7.14910515e-07 -5.54500269e-07
  -8.10749381e-08 -3.96054082e-07  2.63253638e-07  4.52775312e-07
  -1.68005995e-05  3.16548809e-09 -6.70202487e-07 -2.08514070e-08
   4.17512234e-04 -1.37180136e-03 -9.32683585e-08 -9.97578782e-08
  -1.56830534e-08  2.66443985e-08  1.59647771e-04  1.39036949e-09
   1.09275595e-07 -4.32428852e-06  3.95327157e-07  7.01273933e-05
   1.72491265e-06 -6.08099205e-03 -1.46699598e-03 -1.64651718e-04
  -1.56733179e-03  7.66519735e-03  1.77694940e-03 -8.66587452e-05
   4.17347429e-03  3.04529485e-04 -7.27170363e-05 -4.60122622e-03
   3.32439140e-05 -4.88075412e-05  4.98949320e-03  3.21592772e-03
   4.71815284e-03 -5.40347107e-03  1.93260867e-04 -3.50344138e-03
  -2.67262978e-03 -2.71635437e-04  3.77173221e-04 -0.00000000e+00
   1.19633058e-07  3.58103609e-08 -3.65867069e-09  1.79786428e-07
   1.56209176e-05 -1.03950688e-05 -1.34841433e-05  1.51926547e-07
  -2.09390795e-06  2.36761865e-05 -1.44182988e-06 -3.26856148e-05
   1.92148811e-07 -5.33524240e-05 -1.51452799e-06  2.08768557e-05
   4.26052817e-08 -3.15399703e-08  3.02149897e-07 -5.93335698e-08
  -9.46459812e-04]
 [-8.26633359e-04  9.33610420e-04  2.02711118e-04  6.22533146e-03
  -8.66946064e-06  1.63689236e-04  1.45460064e-08 -6.03122400e-07
   1.43293821e-07  6.89344387e-08 -3.52630667e-07 -2.23128596e-05
  -1.11831423e-04  4.00355805e-04  1.09759549e-05 -2.71895058e-05
  -4.84296571e-07 -1.75814746e-07 -1.48133843e-02 -3.55043301e-02
   6.24565419e-02 -3.67565937e-06 -1.11073568e-04  1.17650565e-05
  -1.00429665e-06  2.32786380e-06  1.63734799e-05  1.58977560e-05
   4.26903349e-06 -1.88931740e-04  1.02089382e-05 -8.52196234e-05
   2.17282572e-07 -3.76992628e-05 -3.19507418e-06 -6.03690577e-05
   1.11587969e-07 -4.08420264e-04  1.49899776e-07  1.02019874e-08
   7.48486536e-07  9.71113352e-05 -2.03117177e-06  1.50834147e-04
  -7.61347045e-05 -1.16632028e-06 -1.33638081e-06 -1.02168784e-04
   6.20762155e-04 -0.00000000e+00 -2.20085357e-07 -1.64520723e-06
  -1.04438386e-06 -1.95344180e-06 -6.95847501e-09  9.88147691e-08
   4.34254071e-07  2.04145626e-07 -2.04403378e-06  5.82458497e-08
   1.68901094e-07 -5.06406326e-07 -4.43693680e-04 -7.09460698e-07
  -6.93313592e-03  2.44287790e-06  1.58241947e-08  4.18196300e-09
  -1.51225040e-09  8.81079141e-10 -2.03117288e-07  3.35930468e-09
  -1.09684822e-06  9.16354137e-08  1.73731373e-06  5.60571506e-08
   3.03813150e-07 -0.00000000e+00 -6.50522719e-04  1.10931233e-08
  -9.79358498e-07  3.82553794e-08 -9.28667931e-09 -6.25963238e-07
   8.99074537e-07  2.25319660e-06 -2.42882018e-06 -3.75210614e-06
  -9.39336713e-07 -3.50814052e-08 -1.15683369e-07  2.43603047e-04
  -1.63788041e-06 -7.71055219e-04 -2.28960877e-04 -4.65344001e-04
   7.38156170e-05  1.55659613e-04  2.77062400e-06  3.90651109e-09
  -1.31663126e-07 -4.25125584e-07 -4.55496732e-07 -1.43575980e-04
   1.30551349e-04  7.62498598e-08 -8.52507481e-07  1.01343704e-02
   7.40810509e-03  9.95396532e-01  5.37692161e-02  4.41763421e-05
  -2.97437134e-07 -7.70265605e-07 -4.26986499e-06 -3.17427957e-06
  -4.95046787e-07 -2.19651703e-07 -1.54698304e-06 -3.29659411e-06
  -3.94779390e-06  4.17418485e-07 -1.08614479e-04 -4.06301616e-06
  -2.81983391e-05  1.32546711e-04  1.10487722e-06  2.57443813e-04
   8.35092464e-07  5.67446216e-05 -8.02676826e-05 -7.56052190e-04
   1.35963457e-06  4.52460265e-07 -1.77966247e-07 -4.87327620e-08
   7.01813285e-07 -6.48910082e-07  8.63538909e-07  1.27402312e-07
   6.61979255e-06  3.05298327e-08 -1.09341612e-07  5.77605700e-08
   7.13021835e-05  5.72577524e-03  6.74797379e-08  3.15981059e-08
   1.37543496e-08  2.10546437e-08  4.33837258e-04 -7.34968946e-10
  -4.82136587e-08 -1.21999713e-06 -1.02500007e-06  2.68290180e-05
  -2.64461010e-05  9.54693380e-04  9.90488306e-03 -9.87673357e-04
   1.48230446e-02 -1.64674649e-03 -1.19768492e-04 -6.11102766e-06
  -3.20894434e-03  1.52657273e-03 -2.11270961e-04 -1.24418921e-03
  -7.73098341e-04 -1.46375199e-03 -1.60712145e-03 -7.36517025e-03
   2.87889475e-04 -6.57042016e-03 -1.58575171e-03 -5.33627996e-03
  -4.96669803e-03  2.73588023e-04  1.15131634e-02 -0.00000000e+00
   6.60752077e-08 -1.95787174e-08  3.89570168e-10  3.53494133e-06
   3.98771626e-05 -5.64391977e-05 -1.67659807e-05 -1.85855515e-07
  -5.97231139e-07  4.89648990e-05 -4.53369123e-08  6.24548899e-05
  -5.36833481e-07 -1.61274101e-05 -6.56815045e-06  1.20638120e-04
   1.03722810e-07 -4.26326895e-08 -2.80396734e-07  1.63925846e-07
   1.96651218e-03]
 [ 1.54332195e-05 -2.59172731e-03  1.22211492e-04 -6.49411442e-03
  -1.56146334e-05  1.14425484e-05 -9.06040868e-09 -9.48230363e-07
  -3.57230685e-07  1.02161218e-07 -4.62604806e-07  7.17691842e-05
   8.80137541e-05 -4.85096025e-05  4.02809644e-06  6.87865937e-05
  -3.23126087e-07  4.45300538e-08 -8.08240895e-02 -1.40127042e-02
   9.84363695e-01 -2.54861651e-05 -6.57802641e-05  8.77115092e-06
   5.37284525e-06  1.91298935e-05  2.23782826e-05  1.69962140e-06
   1.68086141e-05 -3.07091989e-05  8.95934207e-06  2.44920673e-04
  -1.93072561e-06 -1.85669296e-04  4.87779508e-06  5.54760635e-05
  -3.59060832e-07  6.87474955e-04 -5.11987267e-08  1.27572185e-07
   2.77134705e-06 -6.67348743e-06  1.20812434e-05 -4.20665495e-04
   2.61529682e-04  5.72863435e-05  3.07293314e-06  2.40836569e-05
  -1.89226916e-04 -0.00000000e+00  1.36560531e-07 -9.97261644e-07
   2.42135683e-07 -2.55740921e-06  2.70959326e-08 -2.60397621e-07
   1.01993192e-07 -6.91253901e-07  1.00259823e-05 -6.02116574e-08
   1.40796085e-07  7.40269475e-08  8.10111469e-04 -2.76801798e-08
  -5.53343634e-03 -1.10055952e-06  4.57446533e-08  4.45549577e-08
   5.14484981e-09  1.60481710e-09 -1.65072458e-06  7.91162825e-10
  -1.39015895e-06  5.79117565e-08 -1.47162026e-06  1.17057741e-06
  -1.13950892e-07 -0.00000000e+00  1.54281477e-03 -1.87905852e-09
  -1.27277098e-05 -8.46954442e-08 -3.93154722e-08 -2.55245673e-06
   4.30862190e-06  2.78975392e-05 -8.93922399e-06 -6.10214816e-06
   3.45428578e-05 -5.35653573e-08 -8.91432768e-07  2.13584525e-04
  -1.05279813e-04  6.48139631e-04  9.48138840e-04  1.78661048e-03
  -3.40067943e-05 -7.89978153e-03 -2.77448943e-05  2.51538060e-09
  -7.23496790e-07  4.79548120e-07  8.86921096e-08  4.93644087e-05
  -1.15422293e-04  1.38473349e-07 -7.70184126e-07 -1.68801272e-02
   9.43207717e-03 -7.03537120e-02  1.31616889e-01  3.44533344e-05
  -5.01622803e-05 -5.62167106e-06 -2.39507970e-05 -3.29066972e-06
   1.45343839e-06  1.59090355e-06  2.04568530e-06 -2.80839840e-06
   8.85441526e-07 -5.35335431e-06 -3.94173379e-05 -2.17665405e-06
  -1.26007264e-04 -1.58003119e-04  1.68928935e-06  5.57291920e-04
   8.54547713e-08  6.26228715e-05 -1.10293543e-06  2.12275333e-04
   1.17750601e-06  8.52767826e-08  4.07670387e-07  7.95392482e-07
   5.04411151e-07  6.19609531e-08  2.24164823e-07  2.16349895e-07
  -3.05031090e-05 -1.02440409e-08 -2.70527268e-07  8.91984859e-09
   8.94834176e-04 -1.04059511e-02  2.06822991e-08 -1.36432093e-07
   4.55471201e-08  4.77323222e-09 -1.76135369e-03 -6.54129033e-10
  -2.33855986e-08  6.24578643e-06 -6.88719720e-07 -9.86477510e-05
   4.20521234e-05 -9.84101480e-03 -3.10518970e-04 -8.16789329e-05
  -1.78769472e-03 -2.10832222e-02 -1.85792115e-03  4.25788917e-04
   5.53615780e-03 -2.46299482e-03 -3.71317657e-04  1.30532090e-02
  -3.20595999e-03  9.54156000e-03  7.35062893e-03 -8.08056274e-04
  -2.37648679e-03 -1.66148008e-02  3.62774625e-03 -2.74476447e-03
   1.25998104e-02  6.94436040e-04 -3.93591715e-03 -0.00000000e+00
  -5.34556074e-08 -3.93462752e-08  4.07525798e-09  4.11318231e-06
  -1.77564157e-04  1.06624862e-04  3.44409287e-05 -5.65205066e-07
   4.19549192e-06  8.43884967e-05  2.05252170e-06 -4.20610001e-05
   9.70132806e-07 -1.34329013e-06 -3.25685270e-06 -2.62610382e-05
  -1.87383545e-07 -7.51637557e-09 -2.51462851e-08 -1.13497271e-07
  -9.64295713e-04]
 [ 1.09165560e-04  2.33811796e-03  1.90084421e-03  1.67212072e-03
   3.53317119e-05  1.52269614e-04 -7.53161517e-08  1.61979615e-07
  -1.14947866e-06 -4.54959083e-07 -1.76367160e-07  5.58093424e-05
  -1.07464106e-04 -4.17965784e-04 -4.50231316e-05  8.13081157e-05
   8.87283424e-07  3.97222204e-08  4.30146848e-02  9.94919103e-01
   1.92267117e-02  9.37532244e-06 -4.39403265e-04 -2.39771982e-05
   5.63146295e-06  8.35408204e-05  3.38689603e-05  4.44186630e-05
  -1.74227903e-05 -8.54630275e-06 -5.77986542e-05  2.11880157e-04
   1.40828503e-05  2.39578744e-04 -1.97473082e-05  3.60605466e-05
   8.21008893e-07  2.48817849e-03 -4.11822824e-07  2.96668792e-07
  -1.74964310e-06 -7.19768818e-05  4.58366304e-06 -7.20899086e-05
  -4.68688740e-04 -1.61735823e-04 -5.19060741e-06  4.06674704e-04
   1.28962069e-04 -0.00000000e+00  3.44497962e-08 -2.08148250e-06
   8.39450532e-07  1.15341413e-06 -7.68760205e-07  9.22325561e-07
   6.67913243e-07  2.30970969e-06  2.64919143e-05  1.24316135e-07
   1.16540916e-06  3.53794448e-07 -9.15154180e-04 -1.36932119e-06
   1.81017075e-03  1.51291540e-06 -1.75974219e-07  9.83567961e-08
   1.54068176e-08  1.06073031e-09  1.33580432e-08  4.03704696e-09
   6.46122919e-06 -1.81728478e-07  1.09592803e-06  3.36860427e-07
   1.08094193e-07 -0.00000000e+00 -1.72804594e-04 -3.05394416e-08
  -2.66156377e-05  2.89473437e-07 -1.24488496e-06  1.16706759e-05
  -1.43731478e-05 -5.98912712e-05  1.27717258e-05  1.01955584e-05
  -5.03002070e-06 -1.63126618e-08  2.53465423e-06 -1.05536747e-03
  -4.89936866e-05 -1.74982567e-03  2.93849152e-03 -1.84699276e-03
   1.65838703e-03  1.71883508e-02  7.54342108e-05  6.20236407e-08
  -6.06469263e-07 -9.10767293e-07  1.22990332e-07  4.08571895e-05
   3.35487078e-05  3.10990207e-07 -3.36133937e-07 -1.26298012e-02
  -7.55945365e-03  3.44842587e-02  1.03809465e-02  9.33415824e-05
  -4.05889802e-06 -2.49597221e-06 -1.60112585e-05 -2.55046233e-05
   1.27121065e-08  1.97048727e-06  3.26704164e-06 -2.04578532e-06
  -1.67909086e-05  5.65127850e-06 -1.51426022e-04 -7.98220363e-06
   1.94764384e-04 -7.12216390e-04 -6.38577151e-06 -1.86286186e-03
   1.21381547e-06 -4.27237015e-04  1.35102148e-04  3.80881267e-04
   7.29433983e-08  1.48861148e-06  2.57126378e-06  2.35574281e-06
   1.71621323e-06 -1.03113647e-07 -2.22226027e-06 -7.89950179e-07
  -3.05362849e-05 -1.95607002e-08 -1.78783957e-06 -1.79068883e-08
  -3.62936800e-03  1.28678907e-02 -8.33687039e-08  8.41277370e-10
  -2.53921824e-08 -8.17935426e-08  1.48756671e-03  2.87367368e-08
   4.23097892e-07 -2.09496575e-05  1.21207556e-06  1.28592166e-04
   8.27919948e-06  2.49142043e-02 -4.78303714e-02  3.85390674e-04
   1.49767593e-02  7.70506889e-03  1.74994345e-04 -6.34475212e-04
   1.28442928e-03  3.23875667e-04  1.49898410e-03 -1.74575497e-02
   4.55747810e-03  1.42067666e-03 -2.17210437e-03  1.50281614e-02
   2.07856554e-02 -1.56426075e-02  1.25966174e-02  2.23993964e-03
   2.02532004e-02 -1.49999407e-03  2.93990747e-02 -0.00000000e+00
   1.20232366e-07  6.13476181e-08 -2.31057638e-09 -1.52582924e-05
   1.50498058e-04 -2.65084411e-04  2.99961864e-04 -3.51119470e-07
   9.77996970e-06  4.73783405e-05  2.17273026e-06 -3.49842703e-05
   1.69888313e-06 -6.96002787e-05  1.09836456e-05  1.98647585e-04
  -1.65025652e-07 -2.71493398e-07  2.01806742e-07  1.43136046e-07
   2.68698951e-03]]
[0.60194532 0.19629934 0.12039764 0.02308566]
In [24]:
Xpca4
Out[24]:
array([[-2702.43927159, -2053.86145796,   750.79941754,  -209.41424788],
       [  531.67099904, -2832.15824603, -1183.42548716,   284.27480282],
       [ -618.68608676, -2443.58831873, -1617.64958583,  -266.93710952],
       ...,
       [-1628.52791777,  -125.79916728,   256.94216287, -1235.76293333],
       [ 8655.77517222,   327.98741341,  2444.22611374,    62.07592017],
       [-1189.91363345, -2603.24192423,  -981.99571763,   288.82852164]])
In [25]:
sns.pairplot(pd.DataFrame(Xpca4))
Out[25]:
<seaborn.axisgrid.PairGrid at 0x7f84af5c29e0>
No description has been provided for this image
In [26]:
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(Xpca4, y_resampled, test_size=0.2, random_state=42,stratify=y_resampled)
In [53]:
dTree_p = DecisionTreeClassifier(criterion = 'gini', random_state=1)
dTree_p.fit(X_train_pca, y_train_pca)
print("Train Score",dTree_p.score(X_train_pca, y_train_pca))
print("Test Score",dTree_p.score(X_test_pca, y_test_pca))
Train Score 1.0
Test Score 0.8344709897610921
In [69]:
param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [5, 10, 15,20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search_p = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_p.fit(X_train_pca, y_train_pca)
Fitting 5 folds for each of 144 candidates, totalling 720 fits
Out[69]:
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [5, 10, 15, 20],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10],
                         'splitter': ['best', 'random']},
             scoring='accuracy', verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [5, 10, 15, 20],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10],
                         'splitter': ['best', 'random']},
             scoring='accuracy', verbose=1)
DecisionTreeClassifier()
DecisionTreeClassifier()
In [70]:
dTreeP_model = grid_search_p.best_params_
dTreeP_model
Out[70]:
{'criterion': 'entropy',
 'max_depth': 20,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'splitter': 'best'}
In [71]:
dTreehp_p = DecisionTreeClassifier(criterion = 'entropy', max_depth=20,min_samples_leaf=1,min_samples_split=2,splitter = 'best',random_state=1)
dTreehp_p.fit(X_train_pca, y_train_pca)
print("Hyper Parametertuning (with PCA) - Train Score",dTreehp_p.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) - Test Score",dTreehp_p.score(X_test_pca, y_test_pca))
Hyper Parametertuning (with PCA) - Train Score 0.9987179487179487
Hyper Paramtertuning (with PCA) - Test Score 0.8225255972696246

Q5 E - Display and explain the classification report in detail.

In [73]:
print("Confusion Matrix- PCA HP")
y_predict = dTreehp_p.predict(X_test_pca)

cm=metrics.confusion_matrix(y_test_pca, y_predict,labels=[0, 1])

df_cm = pd.DataFrame(cm, index = [i for i in ["No","Yes"]],
                  columns = [i for i in ["Predicted No","Predicted Yes"]])
plt.figure(figsize = (7,5))
sns.heatmap(df_cm, annot=True ,fmt='g')
Confusion Matrix- PCA HP
Out[73]:
<Axes: >
No description has been provided for this image
In [75]:
predicted_labels_test = dTreehp_p.predict(X_test_pca)
print("Test Performance Matrix: (PCA HP Dtree Model):")
metrics.classification_report(y_test_pca, predicted_labels_test).split("\n")
Test Performance Matrix: (PCA HP Dtree Model):
Out[75]:
['              precision    recall  f1-score   support',
 '',
 '           0       0.83      0.81      0.82       293',
 '           1       0.81      0.84      0.82       293',
 '',
 '    accuracy                           0.82       586',
 '   macro avg       0.82      0.82      0.82       586',
 'weighted avg       0.82      0.82      0.82       586',
 '']

PCA Model with Hypertuning Observation

Class False:

Precision is 0.83

Recall is 0.81

F1-Score is 0.82

Class True:

Precision is 0.81

Recall is 0.84

F1-Score is 0.82

Q5 F - Apply the above steps for all possible models that you have learnt so far.

Lets start with Logistic Regression model with hyperparamter tuning

In [27]:
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}
clf = LogisticRegression(solver='liblinear')
grid_search_log = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_log.fit(X_train_pca, y_train_pca)
best_params = grid_search_log.best_params_
best_accuracy = grid_search_log.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Log reg) - Train Score",grid_search_log.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Log reg) - Test Score",grid_search_log.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 14 candidates, totalling 70 fits
Best Hyperparameters: {'C': 10, 'penalty': 'l1'}
Best Accuracy: 0.5465811965811966
Hyper Parametertuning (with PCA) (Log reg) - Train Score 0.5444444444444444
Hyper Paramtertuning (with PCA) (Log reg) - Test Score 0.5511945392491467

Naive bayers (We don't have hyper paramter here that we haven't tried)

In [28]:
navb = GaussianNB()
navb.fit(X_train_pca, y_train_pca)
print(" (with PCA) (Naive bayers) - Train Score",navb.score(X_train_pca, y_train_pca))
print(" (with PCA) (Naive bayers) - Test Score",navb.score(X_test_pca, y_test_pca))
 (with PCA) (Naive bayers) - Train Score 0.5478632478632479
 (with PCA) (Naive bayers) - Test Score 0.5853242320819113

lets try for KNN with hyper paramter tuning

In [59]:
param_grid = {'n_neighbors': np.arange(3, 50, 2)}

grid_search_knn = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_knn.fit(X_train_pca, y_train_pca)
best_params = grid_search_knn.best_params_
best_accuracy = grid_search_knn.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (KNN) - Train Score",grid_search_knn.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (KNN) - Test Score",grid_search_knn.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best Hyperparameters: {'n_neighbors': 3}
Best Accuracy: 0.8371794871794872
Hyper Parametertuning (with PCA) (KNN) - Train Score 0.911965811965812
Hyper Paramtertuning (with PCA) (KNN) - Test Score 0.8191126279863481

Now, lets try for SVM with hyper parameter tuning

In [54]:
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
               'kernel' : ["rbf"]}

rand_search_svm = RandomizedSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1,n_iter=10)
rand_search_svm.fit(X_train_pca, y_train_pca)
best_params = rand_search_svm.best_params_
best_accuracy = rand_search_svm.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (SVM) - Train Score",rand_search_svm.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (SVM) - Test Score",rand_search_svm.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Hyperparameters: {'kernel': 'rbf', 'gamma': 0.0001, 'C': 1}
Best Accuracy: 0.7576923076923077
Hyper Parametertuning (with PCA) (SVM) - Train Score 0.997008547008547
Hyper Paramtertuning (with PCA) (SVM) - Test Score 0.7713310580204779

Now, lets try for bagging with hyper parameter tuning

In [41]:
param_grid = {
    'base_estimator__max_depth': [2, 3, 4, 5],
    'n_estimators': [10, 50, 100, 200]
}
clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), random_state=42)

grid_search_bag = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_bag.fit(X_train_pca, y_train_pca)
best_params = grid_search_bag.best_params_
best_accuracy = grid_search_bag.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (bagging) - Train Score",grid_search_bag.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (bagging) - Test Score",grid_search_bag.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 16 candidates, totalling 80 fits
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
Best Hyperparameters: {'base_estimator__max_depth': 5, 'n_estimators': 200}
Best Accuracy: 0.6790598290598291
Hyper Parametertuning (with PCA) (bagging) - Train Score 0.7329059829059829
Hyper Paramtertuning (with PCA) (bagging) - Test Score 0.6911262798634812

Now, lets try for Ada Boosting with hyper parameter tuning

In [42]:
param_grid = {
    'base_estimator__max_depth': [2, 3, 4, 5],
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0]
}
clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), random_state=42)
grid_search_ada = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_ada.fit(X_train_pca, y_train_pca)
best_params = grid_search_ada.best_params_
best_accuracy = grid_search_ada.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Ada boosting) - Train Score",grid_search_ada.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Ada boosting) - Test Score",grid_search_ada.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 36 candidates, totalling 180 fits
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.
  warnings.warn(
Best Hyperparameters: {'base_estimator__max_depth': 5, 'learning_rate': 1.0, 'n_estimators': 200}
Best Accuracy: 0.8414529914529915
Hyper Parametertuning (with PCA) (Ada boosting) - Train Score 1.0
Hyper Paramtertuning (with PCA) (Ada boosting) - Test Score 0.8447098976109215

Now, lets try for Gradient Boosting with hyper parameter tuning

In [48]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 0.9, 1.0], 
    'min_samples_split': [2, 5, 10]
}
rapid_search_gra = RandomizedSearchCV(GradientBoostingClassifier(random_state=42), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1,n_iter=30)
rapid_search_gra.fit(X_train_pca, y_train_pca)
best_params = rapid_search_gra.best_params_
best_accuracy = rapid_search_gra.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Gradient boosting) - Train Score",rapid_search_gra.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Gradient boosting) - Test Score",rapid_search_gra.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best Hyperparameters: {'subsample': 0.9, 'n_estimators': 200, 'min_samples_split': 2, 'max_depth': 5, 'learning_rate': 0.1}
Best Accuracy: 0.8170940170940172
Hyper Parametertuning (with PCA) (Gradient boosting) - Train Score 0.9931623931623932
Hyper Paramtertuning (with PCA) (Gradient boosting) - Test Score 0.8156996587030717

Now, lets try for Random Forest with hyper parameter tuning

In [57]:
param_grid = {
    'n_estimators': randint(50, 200),
    'max_depth': [None] + list(range(5, 20)),
    'min_samples_split': randint(2, 11), 
    'min_samples_leaf': randint(1, 5)
}

rand_search_rand = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1,n_iter=20)
rand_search_rand.fit(X_train_pca, y_train_pca)
best_params = rand_search_rand.best_params_
best_accuracy = rand_search_rand.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Random Forest) - Train Score",rand_search_rand.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Random Forest) - Test Score",rand_search_rand.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Hyperparameters: {'max_depth': 17, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 194}
Best Accuracy: 0.8602564102564102
Hyper Parametertuning (with PCA) (Random Forest) - Train Score 0.9914529914529915
Hyper Paramtertuning (with PCA) (Random Forest) - Test Score 0.8515358361774744

Q6 A- Display and compare all the models designed with their train and test accuracies.

image.png

Q6 B - Select the final best trained model along with your detailed comments for selecting this model.

The best model from the comparision is Random forest

with train score = 0.99

and test score = 0.85

This will be th best performing model from the comparision , we can use this in production usage

Q6 C - Pickle the selected model for future use.

In [61]:
with open('prod_model_rand_forest.pkl', 'wb') as f:
    pickle.dump(rand_search_rand, f)

Q6 D - Write your conclusion on the results.

We have performed several activities in this project , from cleaning the data, feature scaling (feature elimination for multicolinearity in independent features), reduced dimensions by using PCA (principal component analysis), tried several different supervised algorithm with hyper paramter tuning for each (except Naive algo) and select the best model as random forest based on its score and performance and saved(pickled) for use in future (production). This project gives very detailes analysis in each phase from feature scaling to model tuning.